

"""
[模块使用]
   requests >>> 数据请求模块
   docx >>> 文档保存

1，获取文档内容<图片形式>
2，通过API接口做文字识别
3。识别出来的内容保存文档

"""

#导入请求模块
import  requests
#请求链接
url = 'https://wenku.baidu.com/gsearch/rec/pcviewdocrec2023'
#请求头
headers = {
    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
    'Cookie':'BIDUPSID=6E136920F79A84797E9D14F146DF1238; PSTM=1698068300; BDUSS=nMwLTMwZDQybDIxQWk2bDEza1hxdHF5eVVvMDdKTDhxTTQ1WWV0VXNNWEphbkJsSVFBQUFBJCQAAAAAAAAAAAEAAADAUr8aob4xNTc3NTEyNjg5ob8AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAMndSGXJ3UhlM; BDUSS_BFESS=nMwLTMwZDQybDIxQWk2bDEza1hxdHF5eVVvMDdKTDhxTTQ1WWV0VXNNWEphbkJsSVFBQUFBJCQAAAAAAAAAAAEAAADAUr8aob4xNTc3NTEyNjg5ob8AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAMndSGXJ3UhlM; BAIDUID=6E136920F79A8479215CDE0B7332D35F:SL=0:NR=10:FG=1; H_WISE_SIDS=39634_39648_39670_39664_39694_39676_39713_39738_39704_39683_39662_39678_39801_39817_39836; H_WISE_SIDS_BFESS=39634_39648_39670_39664_39694_39676_39713_39738_39704_39683_39662_39678_39801_39817_39836; H_PS_PSSID=39713_39704_39683_39662_39678_39817_39836_39842_39902_39909_39932_39946_39940_39938_39930; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; BAIDUID_BFESS=6E136920F79A8479215CDE0B7332D35F:SL=0:NR=10:FG=1; BDRCVFR[feWj1Vr5u3D]=I67x6TjHwwYf0; delPer=0; PSINO=5; BA_HECTOR=0125ah0k0lag8ga485800h201in7lt91r; ZFY=fqReFrnoBztJAwThrCqvyZOdb:A9D5gA7W2fqvKjsJn0:C; Hm_lvt_d8bfb560f8d03bbefc9bdecafc4a4bf6=1702104455; ZD_ENTRY=baidu; BCLID=11868396277048861524; BCLID_BFESS=11868396277048861524; BDSFRCVID=AVkOJexroG0qJRcqJbhJhweqd_WYvd6TDYLE391HUqCSk_LVFsQGEG0Pts1-dEu-S2MAogKKKgOTHICF_2uxOjjg8UtVJeC6EG0Ptf8g0M5; BDSFRCVID_BFESS=AVkOJexroG0qJRcqJbhJhweqd_WYvd6TDYLE391HUqCSk_LVFsQGEG0Pts1-dEu-S2MAogKKKgOTHICF_2uxOjjg8UtVJeC6EG0Ptf8g0M5; H_BDCLCKID_SF=tJIJ_ID2JCD3enTmbt__-P4DeNQl3MRZ56bHWh0b-bR4DPjm-PJbMb-8KfO-BMPj52OnKUT-3R5lbJ7v54on0MnQ34jUbPn43bRTLPOqMn3PMb_lQMcMhP-UyPRMWh37Wm7lMKoaMp78jR093JO4y4Ldj4oxJpOJ5JbMopCafD-hbKDwD50aentqqlQDetJyaR3f2pTvWJ5TMCo-DxnMbM4LyG5gh4vf566bK4op2IjCShPC-tnYjPCNMfcitU3BtNTEVn3q3l02VbO9e-t2ynLIhGjaW4RMW23i0h7mWpTTsxA45J7cM4IseboJLfT-0bc4KKJxbnLWeIJ9jjCMjjvLeH0jJ6ne2toDQ6rJabC3SpcDXU6qLT5X0GKO2UoI-mLHbh3Sbb5ae-bKX45UKl0njxQyQCr85av-2D5s0DnlJCnT2fonDh8S2a7MJUntBC_q3q6O5hvvOn3O3M7zLUKmDloOW-TB5bbPLUQF5l8-sq0x0bOte-bQ2a_EJ6tOtRIH_Kv55RrOfjrP-trf5DCShUFs3l5WB2Q-XPoO3KJaOxonKf6DMMIJ5UoDKpRUy6cb_fbgylRphIIlyhoaLptfBPvp0JJI32TxoUJ2-KDVeh5Gqq-KXf_ebPRiB-b9QgbA5hQ7tt5W8ncFbT7l5hKpbt-q0x-jLTnhVn0MBCK0HPonHjLBj55y3j; H_BDCLCKID_SF_BFESS=tJIJ_ID2JCD3enTmbt__-P4DeNQl3MRZ56bHWh0b-bR4DPjm-PJbMb-8KfO-BMPj52OnKUT-3R5lbJ7v54on0MnQ34jUbPn43bRTLPOqMn3PMb_lQMcMhP-UyPRMWh37Wm7lMKoaMp78jR093JO4y4Ldj4oxJpOJ5JbMopCafD-hbKDwD50aentqqlQDetJyaR3f2pTvWJ5TMCo-DxnMbM4LyG5gh4vf566bK4op2IjCShPC-tnYjPCNMfcitU3BtNTEVn3q3l02VbO9e-t2ynLIhGjaW4RMW23i0h7mWpTTsxA45J7cM4IseboJLfT-0bc4KKJxbnLWeIJ9jjCMjjvLeH0jJ6ne2toDQ6rJabC3SpcDXU6qLT5X0GKO2UoI-mLHbh3Sbb5ae-bKX45UKl0njxQyQCr85av-2D5s0DnlJCnT2fonDh8S2a7MJUntBC_q3q6O5hvvOn3O3M7zLUKmDloOW-TB5bbPLUQF5l8-sq0x0bOte-bQ2a_EJ6tOtRIH_Kv55RrOfjrP-trf5DCShUFs3l5WB2Q-XPoO3KJaOxonKf6DMMIJ5UoDKpRUy6cb_fbgylRphIIlyhoaLptfBPvp0JJI32TxoUJ2-KDVeh5Gqq-KXf_ebPRiB-b9QgbA5hQ7tt5W8ncFbT7l5hKpbt-q0x-jLTnhVn0MBCK0HPonHjLBj55y3j; Hm_lpvt_d8bfb560f8d03bbefc9bdecafc4a4bf6=1702125928; ab_sr=1.0.1_NzQyMTA2OTUwOTE5NDQyNmQ1M2U5OGUwMTM5MTUxMDg1OTExYWZhMjkzMGNiNjE5ZWFkZmE0ZGZmNzVkM2U3ZWRiZDE1ZjJhZWEzNTFlMGNlODliMzUyZmNlODgzZGYxYWE2ZWZlM2YyYzVlMDkwYzg4OGEwM2M0MDJiZGI1NDE3ZWIwMWI1ZGViM2RhMGI5NzllZGNkODgwM2M1ZGI3MWYwOTY3OGNmNjZkMzcyZjAyZmQ4OGIyNDY1OGJlZDQz'
}
#请求参数
data = {
    'docId':'57a00e3e24284b73f242336c1eb91a37f111329d',
    'query':'趣味有奖问答题目及答案',
    'recPositions':'catalog,toplist'
}
#发送请求
reponse = requests.get(url=url,headers=headers,params=data)
print(reponse.json())
#for循环遍历
num = 1
for index in  reponse.json()['data']['catalogDoc']:
    pic = index['pic']
    image_content = requests.get(url=pic,headers=headers).content
    with open('img\\' + str(num) +'.jpg',mode='wb') as f:
        f.write(image_content)
    num += 1
